{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "pycharm": {
     "is_executing": false
    }
   },
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import cv2 as cv2\n",
    "from shutil import copyfile\n",
    "from random import shuffle\n",
    "import pathlib\n",
    "\n",
    "# Dataset name must be valid as a filename/path\n",
    "dataset_name = \"squirrels\"\n",
    "class_id = '/m/071qp'\n",
    "\n",
    "# Proportion of examples which will go in train\n",
    "# For example, 0.75 reserves 75% of images for training, 25% for validation\n",
    "tv_split = 0.75\n",
    "\n",
    "# Make the directories we will need to store the dataset\n",
    "pathlib.Path(\"%s/all/images\" % dataset_name).mkdir(parents=True, exist_ok=True)\n",
    "pathlib.Path(\"%s/all/labels\" % dataset_name).mkdir(parents=True, exist_ok=True)\n",
    "pathlib.Path(\"%s/train/images\" % dataset_name).mkdir(parents=True, exist_ok=True)\n",
    "pathlib.Path(\"%s/train/labels\" % dataset_name).mkdir(parents=True, exist_ok=True)\n",
    "pathlib.Path(\"%s/val/images\" % dataset_name).mkdir(parents=True, exist_ok=True)\n",
    "pathlib.Path(\"%s/val/labels\" % dataset_name).mkdir(parents=True, exist_ok=True)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Images 1810\n",
      "Total BBoxes: 1940\n"
     ]
    }
   ],
   "source": [
    "# Load BBoxes\n",
    "df_bbox = pd.read_csv('train-annotations-bbox.csv')\n",
    "\n",
    "# Filter by class\n",
    "bboxes = df_bbox[df_bbox['LabelName'].str.contains(class_id)]\n",
    "unique_images = bboxes.groupby(['ImageID'])\n",
    "\n",
    "print(\"Images %d\" % len(unique_images))\n",
    "print(\"Total BBoxes: %d\" % len(bboxes))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Create Labels\n",
    "def create_label(row):\n",
    "    \n",
    "    if row['IsOccluded'] == -1:\n",
    "        return None\n",
    "    \n",
    "    image = cv2.imread('train/%s.jpg' % row['ImageID'])\n",
    "    \n",
    "    left = row['XMin'] * image.shape[1]\n",
    "    top = row['YMin'] * image.shape[0]\n",
    "    right = row['XMax'] * image.shape[1]\n",
    "    bot = row['YMax'] * image.shape[0]\n",
    "    \n",
    "    \n",
    "    if image.shape[0] > 1024 or image.shape[1] > 1024:\n",
    "        return None\n",
    "    \n",
    "    fields = []\n",
    "    fields.append(dataset_name)\n",
    "    fields.append('0.0')\n",
    "    \n",
    "    fields.append(str(row['IsOccluded']))\n",
    "    fields.append('0.0')\n",
    "    fields.append(\"%.2f\" % left)\n",
    "    fields.append(\"%.2f\" % top)\n",
    "    fields.append(\"%.2f\" % right)\n",
    "    fields.append(\"%.2f\" % bot)\n",
    "    fields.append('0.0')\n",
    "    fields.append('0.0')\n",
    "    fields.append('0.0')\n",
    "    fields.append('0.0')\n",
    "    fields.append('0.0')\n",
    "    fields.append('0.0')\n",
    "    fields.append('0.0')\n",
    "    \n",
    "    return \" \".join(fields)\n",
    "\n",
    "image_dict = {}\n",
    "\n",
    "for index, row in bboxes.iterrows():\n",
    "    l = create_label(row)\n",
    "    \n",
    "    if l is None:\n",
    "        continue\n",
    "    \n",
    "    if row['ImageID'] not in image_dict:\n",
    "        image_dict[row['ImageID']] = [l]\n",
    "    else:\n",
    "        image_dict[row['ImageID']].append(l)\n",
    "\n",
    "\n",
    "\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Copy files to \"all\" folder\n",
    "for image_id in image_dict:\n",
    "        all_image_path = '%s/all/images/%s.jpg' % (dataset_name, image_id)\n",
    "        copyfile('train/%s.jpg' % image_id, all_image_path)\n",
    "\n",
    "        all_label_path = '%s/all/labels/%s.txt' % (dataset_name, image_id)\n",
    "        all_label_data = \"\\n\".join(image_dict[image_id])\n",
    "        \n",
    "        open(all_label_path, 'w').write(all_label_data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Create train/validation split and copy files there\n",
    "images = [[img, \n",
    "           \"%s/all/images/%s.jpg\" % (dataset_name, img), \n",
    "           \"%s/all/labels/%s.txt\" % (dataset_name, img)] for img in image_dict]\n",
    "\n",
    "shuffle(images)\n",
    "\n",
    "train = images[0:int(len(images)*tv_split)]\n",
    "val = images[int(len(images)*tv_split):]\n",
    "\n",
    "for iid, image, label in train:\n",
    "    copyfile(image, '%s/train/images/%s.jpg' % (dataset_name, iid))\n",
    "    copyfile(label, '%s/train/labels/%s.txt' % (dataset_name, iid))\n",
    "\n",
    "\n",
    "for iid, image, label in val:\n",
    "    copyfile(image, '%s/val/images/%s.jpg' % (dataset_name, iid))\n",
    "    copyfile(label, '%s/val/labels/%s.txt' % (dataset_name, iid))\n",
    "\n",
    "\n",
    "\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.1"
  },
  "pycharm": {
   "stem_cell": {
    "cell_type": "raw",
    "source": [],
    "metadata": {
     "collapsed": false
    }
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}